# Direct outputs eligible for public release

# GLOBAL SETTINGS --------------------------------------------------------------

options(
  scipen = 999,
  digits = 16,
  max.print = .Machine$integer.max,
  show.error.locations = TRUE,
  warn = 1
)

RNGkind("L'Ecuyer-CMRG")
seed <- 818675309L
set.seed(seed) # setting main seed

# PACKAGES ---------------------------------------------------------------------
library(data.table)
library(ggplot2)
library(scales)
library(grid)
library(pBrackets)

# PACKAGE SETTINGS -------------------------------------------------------------

# data.table
setDTthreads(threads = 1L)
options(datatable.print.class = TRUE, datatable.print.keys = TRUE)
# so that printing the data.table also shows the variable type on top

# BEGIN FILE -------------------------------------------------------------------

# Just in case something masks 'source()', use extra deliberate base::source().
base::source("~/code/0-utility-functions/graphing-utils.R", local = TRUE)
base::source("~/code/0-utility-functions/table-and-text-utils.R", local = TRUE)

# Produce Figure 3.1a - first-difference (FD) estimator with random cohort

fd_cohort_levels <-
    setDT(
        readRDS("~/estimation-output/single-cohort-fd-results.rds")
        )[estimate_type == "level"]

fd_cohort_pre_line_val <- fd_cohort_levels[!is.na(pre_line)]$pre_line[[1]]
fd_cohort_post_line_val <- fd_cohort_levels[!is.na(post_line)]$post_line[[1]]

fd_cohort_pct_change <-
    (fd_cohort_post_line_val - fd_cohort_pre_line_val) / fd_cohort_pre_line_val

figure_3_1_a <-
    ggplot(data = fd_cohort_levels, aes(x = event_time, y = estimate)) +
    geom_line() +
    geom_line(
        aes(y = pre_line),
        linetype = "dotted",
        show.legend = FALSE,
        color = "blue"
    ) +
    geom_line(
        aes(y = post_line),
        linetype = "dotted",
        show.legend = FALSE,
        color = "blue"
    ) +
    theme_bw(base_size = 11) +
    theme(
        panel.grid.minor = element_blank(),
        panel.grid.major = element_blank()
    ) +
    scale_x_continuous(
        breaks = seq.int(from = -4L, to = 5L, by = 1L),
        expand = expansion(mult = c(0.0025, 0.0025))
    ) +
    scale_y_continuous(
        breaks = seq.int(from = 27000L, to = 36000L, by = 1000L)
    ) +
    labs(
        x = "Event Time (ℓ)",
        y = "Mean Winner Wage Earnings (2016 USD)"
    ) +
    coord_cartesian(ylim = c(27000, 36000)) +
    annotation_custom(
        BracketsGrob(
            0.4475, 0.30, 0.4475, 0.6175,
            h = -0.05,
            lwd = 0.5,
            col = "blue"
        )
    ) +
    annotate(
        "text",
        x = 1.125,
        y = 31100,
        size = 3L,
        label =
            sprintf(
                "FD Estimate\n(Single Cohort)\n%s",
                round(
                    (fd_cohort_post_line_val - fd_cohort_pre_line_val),
                    digits = 0L
                )
            )
    )

ggsave(
    plot = figure_3_1_a,
    filename = "~/paper/figures/figure-3.1a.png",
    width = 6,
    height = 4,
    dpi = 600
)

ggsave(
    plot = figure_3_1_a,
    filename = "~/paper/figures/figure-3.1a.tif",
    width = 6,
    height = 4,
    device = "tiff",
    dpi = 600
)

print(
    sprintf(
        "FD estimate using a single cohort (2003) approx. a %s percent change",
        fmt_fixed_decimal(
            input = fd_cohort_pct_change,
            should_be_rounded = TRUE,
            round_digits = 0L,
            decimal_digits = 0L,
            scale_factor = 100,
            add_comma = FALSE
        )
    )
)

# Housekeeping
fd_cohort_levels <- NULL
fd_cohort_pre_line_val <- NULL
fd_cohort_post_line_val <- NULL
fd_cohort_pct_change <- NULL
figure_3_1_a <- NULL
rm(
    fd_cohort_levels,
    fd_cohort_pre_line_val,
    fd_cohort_post_line_val,
    fd_cohort_pct_change,
    figure_3_1_a
)

# Produce Figure 3.1b - pooled FD estimator using all cohorts

fd_pooled_levels <-
    setDT(
        readRDS("~/estimation-output/pooled-fd-results.rds")
        )[estimate_type == "level"]

fd_pooled_pre_line_val <- fd_pooled_levels[!is.na(pre_line)]$pre_line[[1]]
fd_pooled_post_line_val <- fd_pooled_levels[!is.na(post_line)]$post_line[[1]]

fd_pooled_pct_change <-
    (fd_pooled_post_line_val - fd_pooled_pre_line_val) / fd_pooled_pre_line_val

figure_3_1_b <-
    ggplot(data = fd_pooled_levels, aes(x = event_time, y = estimate)) +
    geom_line() +
    geom_line(
        aes(y = pre_line),
        linetype = "dotted",
        show.legend = FALSE,
        color = "blue"
    ) +
    geom_line(
        aes(y = post_line),
        linetype = "dotted",
        show.legend = FALSE,
        color = "blue"
    ) +
    theme_bw(base_size = 11) +
    theme(
        panel.grid.minor = element_blank(),
        panel.grid.major = element_blank()
    ) +
    scale_x_continuous(
        breaks = seq.int(from = -7L, to = 5L, by = 1L),
        expand = expansion(mult = c(0.0025, 0.0025))
    ) +
    scale_y_continuous(
        breaks = seq.int(from = 27000L, to = 36000L, by = 1000L)
    ) +
    labs(
        x = "Event Time (ℓ)",
        y = "Mean Winner Wage Earnings (2016 USD)"
    ) +
    coord_cartesian(ylim = c(27000, 36000)) +
    annotation_custom(
        BracketsGrob(
            0.5825, 0.22, 0.5825, 0.5935,
            h = -0.05,
            lwd = 0.5,
            col = "blue"
        )
    ) +
    annotate(
        "text",
        x = 1.125,
        y = 30600,
        size = 3L,
        label =
            sprintf(
                "FD Estimate\n(Pooled)\n%s",
                round(
                    (fd_pooled_post_line_val - fd_pooled_pre_line_val),
                    digits = 0L
                )
            )
    )

ggsave(
    plot = figure_3_1_b,
    filename = "~/paper/figures/figure-3.1b.png",
    width = 6,
    height = 4,
    dpi = 600
)

ggsave(
    plot = figure_3_1_b,
    filename = "~/paper/figures/figure-3.1b.tif",
    width = 6,
    height = 4,
    device = "tiff",
    dpi = 600
)

print(
    sprintf(
        "Pooled FD estimate approx. a %s percent change",
        fmt_fixed_decimal(
            input = fd_pooled_pct_change,
            should_be_rounded = TRUE,
            round_digits = 0L,
            decimal_digits = 0L,
            scale_factor = 100,
            add_comma = FALSE
        )
    )
)

# Housekeeping
fd_pooled_levels <- NULL
fd_pooled_pre_line_val <- NULL
fd_pooled_post_line_val <- NULL
fd_pooled_pct_change <- NULL
figure_3_1_b <- NULL
rm(
    fd_pooled_levels,
    fd_pooled_pre_line_val,
    fd_pooled_post_line_val,
    fd_pooled_pct_change,
    figure_3_1_b
)

# Produce Figure 3.1c - DiD estimator using later winners as control group

did_laterwinners_levels <-
    setDT(readRDS("~/estimation-output/estimates-for-figure-3-1-c.rds"))

did_laterwinners_pre_line_c_val <-
    did_laterwinners_levels[!is.na(pre_line_c)]$pre_line_c[[1]]
did_laterwinners_post_line_c_val <-
    did_laterwinners_levels[!is.na(post_line_c)]$post_line_c[[1]]
did_laterwinners_pre_line_t_val <-
    did_laterwinners_levels[!is.na(pre_line_t)]$pre_line_t[[1]]
did_laterwinners_post_line_t_val <-
    did_laterwinners_levels[!is.na(post_line_t)]$post_line_t[[1]]

did_laterwinners_pct_change <-
    (
        (did_laterwinners_post_line_t_val - did_laterwinners_post_line_c_val) -
        (did_laterwinners_pre_line_t_val - did_laterwinners_pre_line_c_val)
    ) /
    did_laterwinners_post_line_t_val


figure_3_1_c <-
    ggplot(
        data = did_laterwinners_levels,
        aes(x = ref_event_time, y = estimate, color = treat_label)
    ) +
    geom_line() +
    geom_line(aes(y = pre_line_t), linetype = "dotted", color = "blue") +
    geom_line(aes(y = pre_line_c), linetype = "dotted", color = "blue") +
    geom_line(aes(y = post_line_t), linetype = "dotted", color = "blue") +
    geom_line(aes(y = post_line_c), linetype = "dotted", color = "blue") +
    theme_bw(base_size = 11) +
    theme(
        panel.grid.minor = element_blank(),
        panel.grid.major = element_blank(),
        legend.text = element_text(size = 9)
    ) +
    scale_x_continuous(
        breaks = seq.int(from = -7L, to = 5L, by = 1L),
        expand = expansion(mult = c(0.0025, 0.0025))
    ) +
    scale_y_continuous(
        breaks = seq.int(from = 27000L, to = 36000L, by = 1000L)
    ) +
    labs(
        x = "Event Time (ℓ)",
        y = "Mean Winner Wage Earnings (2016 USD)"
    ) +
    coord_cartesian(ylim = c(27000, 36000)) +
    scale_color_manual(values = c(grey(0), grey(0.75))) +
    theme(
        legend.title = element_blank(),
        legend.position = c(0.83, 0.93325),
        legend.background = element_rect(fill = "white", color = NA),
        strip.text.x = element_blank(),
        strip.background = element_blank(),
        legend.key = element_rect(NA),
        legend.spacing.y = unit(2, "mm"),
        legend.key.height = unit(4, "mm"),
        legend.key.width = unit(4, "mm"),
        legend.margin = margin(t = 0, r = 0.1, b = 0.15, l = 0.1, unit = "cm")
    ) +
    annotation_custom(
        BracketsGrob(
            0.58, 0.6675, 0.58, 0.695,
            h = -0.05,
            lwd = 0.5,
            col = "blue"
        )
    ) +
    annotate(
        "text",
        x = 2.6,
        y = 34000,
        size = 3L,
        label =
            sprintf(
                "FD Control\n(Later Winners)\n%s",
                round(
                    (did_laterwinners_post_line_c_val - did_laterwinners_pre_line_c_val), #nolint
                    digits = 0L
                )
            )
    ) +
    annotate(
        "segment",
        x = 1.65,
        y = 33900,
        xend = 0.45,
        yend = 33350,
        linewidth = 0.3,
        linetype = "dotted",
        color = "blue",
        arrow = arrow(type = "closed", length = unit(0.02, "npc"))
    ) +
    annotation_custom(
        BracketsGrob(
            0.58, 0.40, 0.58, 0.81,
            h = 0.05,
            lwd = 0.5,
            col = "blue"
        )
    ) +
    annotate(
        "text",
        x = -1.75,
        y = 32550,
        size = 3L,
        label =
            sprintf(
                "FD Treated\n(Current Winners)\n%s",
                round(
                    (did_laterwinners_post_line_t_val - did_laterwinners_pre_line_t_val), #nolint
                    digits = 0L
                )
            )
    ) +
    annotate(
        "text",
        x = -4.5,
        y = 28000,
        size = 3L,
        label = "DiD Estimate\n(Later Winners)"
    ) +
    annotate(
        "text",
        x = -3,
        y = 28000,
        size = 3L,
        label = "="
    ) +
    annotate(
        "text",
        x = -1.5,
        y = 28000,
        size = 3L,
        label = "FD Treated\n(Current Winners)"
    ) +
    annotate(
        "text",
        x = 0,
        y = 28000,
        size = 3L,
        label = "-"
    ) +
    annotate(
        "text",
        x = 1.5,
        y = 28000,
        size = 3L,
        label = "FD Control\n(Later Winners)"
    ) +
    annotate(
        "text",
        x = 2.75,
        y = 28000,
        size = 3L,
        label = "="
    ) +
    annotate(
        "text",
        x = 3.65,
        y = 28000,
        size = 3L,
        label =
            sprintf(
                "%s",
                round(
                    (did_laterwinners_post_line_t_val - did_laterwinners_pre_line_t_val), #nolint
                    digits = 0
                ) -
                round(
                    (did_laterwinners_post_line_c_val - did_laterwinners_pre_line_c_val), #nolint
                    digits = 0
                )
            )
    )

ggsave(
    plot = figure_3_1_c,
    filename = "~/paper/figures/figure-3.1c.png",
    width = 6,
    height = 4,
    dpi = 600
)

ggsave(
    plot = figure_3_1_c,
    filename = "~/paper/figures/figure-3.1c.tif",
    width = 6,
    height = 4,
    device = "tiff",
    dpi = 600
)

print(
    sprintf(
        "DiD estimate (using later winners) approx. a %s percent change",
        fmt_fixed_decimal(
            input = did_laterwinners_pct_change,
            should_be_rounded = TRUE,
            round_digits = 0L,
            decimal_digits = 0L,
            scale_factor = 100,
            add_comma = FALSE
        )
    )
)

# Housekeeping
did_laterwinners_levels <- NULL
did_laterwinners_pre_line_c_val <- NULL
did_laterwinners_pre_line_t_val <- NULL
did_laterwinners_post_line_c_val <- NULL
did_laterwinners_post_line_t_val <- NULL
did_laterwinners_pct_change <- NULL
figure_3_1_c <- NULL
rm(
    did_laterwinners_levels,
    did_laterwinners_pre_line_c_val,
    did_laterwinners_pre_line_t_val,
    did_laterwinners_post_line_c_val,
    did_laterwinners_post_line_t_val,
    did_laterwinners_pct_change,
    figure_3_1_c
)

# Produce Figure 3.1d - DiD estimator using non-winners as control group

did_nonwinners_levels <-
    setDT(readRDS("~/estimation-output/estimates-for-figure-3-1-d.rds"))

did_nonwinners_pre_line_c_val <-
    did_nonwinners_levels[!is.na(pre_line_c)]$pre_line_c[[1]]
did_nonwinners_post_line_c_val <-
    did_nonwinners_levels[!is.na(post_line_c)]$post_line_c[[1]]
did_nonwinners_pre_line_t_val <-
    did_nonwinners_levels[!is.na(pre_line_t)]$pre_line_t[[1]]
did_nonwinners_post_line_t_val <-
    did_nonwinners_levels[!is.na(post_line_t)]$post_line_t[[1]]

did_nonwinners_pct_change <-
    (
        (did_nonwinners_post_line_t_val - did_nonwinners_post_line_c_val) -
        (did_nonwinners_pre_line_t_val - did_nonwinners_pre_line_c_val)
    ) /
    did_nonwinners_post_line_t_val


figure_3_1_d <-
    ggplot(
        data = did_nonwinners_levels,
        aes(x = ref_event_time, y = estimate, color = treat_label)
    ) +
    geom_line() +
    geom_line(aes(y = pre_line_t), linetype = "dotted", color = "blue") +
    geom_line(aes(y = pre_line_c), linetype = "dotted", color = "blue") +
    geom_line(aes(y = post_line_t), linetype = "dotted", color = "blue") +
    geom_line(aes(y = post_line_c), linetype = "dotted", color = "blue") +
    theme_bw(base_size = 11) +
    theme(
        panel.grid.minor = element_blank(),
        panel.grid.major = element_blank(),
        legend.text = element_text(size = 9)
    ) +
    scale_x_continuous(
        breaks = seq.int(from = -7L, to = 5L, by = 1L),
        expand = expansion(mult = c(0.0025, 0.0025))
    ) +
    scale_y_continuous(
        breaks = seq.int(from = 27000L, to = 36000L, by = 1000L)
    ) +
    labs(
        x = "Event Time (ℓ)",
        y = "Mean Winner Wage Earnings (2016 USD)"
    ) +
    coord_cartesian(ylim = c(27000, 36000)) +
    scale_color_manual(values = c(grey(0), grey(0.75))) +
    theme(
        legend.title = element_blank(),
        legend.position = c(0.83, 0.93325),
        legend.background = element_rect(fill = "white", color = NA),
        strip.text.x = element_blank(),
        strip.background = element_blank(),
        legend.key = element_rect(NA),
        legend.spacing.y = unit(2, "mm"),
        legend.key.height = unit(4, "mm"),
        legend.key.width = unit(4, "mm"),
        legend.margin = margin(t = 0, r = 0.1, b = 0.15, l = 0.1, unit = "cm")
    ) +
    annotation_custom(
        BracketsGrob(
            0.58, 0.6325, 0.58, 0.715,
            h = -0.05,
            lwd = 0.5,
            col = "blue"
        )
    ) +
    annotate(
        "text",
        x = 2.6,
        y = 34000,
        size = 3L,
        label =
            sprintf(
                "FD Control\n(Non-Winners)\n%s",
                round(
                    (did_nonwinners_post_line_c_val - did_nonwinners_pre_line_c_val), #nolint
                    digits = 0L
                )
            )
    ) +
    annotate(
        "segment",
        x = 1.65,
        y = 33900,
        xend = 0.45,
        yend = 33350,
        linewidth = 0.3,
        linetype = "dotted",
        color = "blue",
        arrow = arrow(type = "closed", length = unit(0.02, "npc"))
    ) +
    annotation_custom(
        BracketsGrob(
            0.58, 0.42, 0.58, 0.8825,
            h = 0.05,
            lwd = 0.5,
            col = "blue"
        )
    ) +
    annotate(
        "text",
        x = -1.75,
        y = 32550,
        size = 3L,
        label =
            sprintf(
                "FD Treated\n(Current Winners)\n%s",
                round(
                    (did_nonwinners_post_line_t_val - did_nonwinners_pre_line_t_val), #nolint
                    digits = 0L
                )
            )
    ) +
    annotate(
        "text",
        x = -4.5,
        y = 28000,
        size = 3L,
        label = "DiD Estimate\n(Non-Winners)"
    ) +
    annotate(
        "text",
        x = -3,
        y = 28000,
        size = 3L,
        label = "="
    ) +
    annotate(
        "text",
        x = -1.5,
        y = 28000,
        size = 3L,
        label = "FD Treated\n(Current Winners)"
    ) +
    annotate(
        "text",
        x = 0,
        y = 28000,
        size = 3L,
        label = "-"
    ) +
    annotate(
        "text",
        x = 1.5,
        y = 28000,
        size = 3L,
        label = "FD Control\n(Non-Winners)"
    ) +
    annotate(
        "text",
        x = 2.75,
        y = 28000,
        size = 3L,
        label = "="
    ) +
    annotate(
        "text",
        x = 3.65,
        y = 28000,
        size = 3L,
        label =
            sprintf(
                "%s",
                round(
                    (did_nonwinners_post_line_t_val - did_nonwinners_pre_line_t_val), #nolint
                    digits = 0
                ) -
                round(
                    (did_nonwinners_post_line_c_val - did_nonwinners_pre_line_c_val),  #nolint
                    digits = 0
                )
            )
    )

ggsave(
    plot = figure_3_1_d,
    filename = "~/paper/figures/figure-3.1d.png",
    width = 6,
    height = 4,
    dpi = 600
)

ggsave(
    plot = figure_3_1_d,
    filename = "~/paper/figures/figure-3.1d.tif",
    width = 6,
    height = 4,
    device = "tiff",
    dpi = 600
)

print(
    sprintf(
        "DiD estimate (using non-winners) approx. a %s percent change",
        fmt_fixed_decimal(
            input = did_nonwinners_pct_change,
            should_be_rounded = TRUE,
            round_digits = 0L,
            decimal_digits = 0L,
            scale_factor = 100,
            add_comma = FALSE
        )
    )
)

# Housekeeping
did_nonwinners_levels <- NULL
did_nonwinners_pre_line_c_val <- NULL
did_nonwinners_pre_line_t_val <- NULL
did_nonwinners_post_line_c_val <- NULL
did_nonwinners_post_line_t_val <- NULL
did_nonwinners_pct_change <- NULL
figure_3_1_d <- NULL
rm(
    did_nonwinners_levels,
    did_nonwinners_pre_line_c_val,
    did_nonwinners_pre_line_t_val,
    did_nonwinners_post_line_c_val,
    did_nonwinners_post_line_t_val,
    did_nonwinners_pct_change,
    figure_3_1_d
)